cd "D:\Dropbox\book_welfare\replication"



************************************************
* make descriptive Bookstat regression table 
************************************************

 use data\bookstat_gender_pyear_genre_asin.dta, clear

	collapse (sum) males=N_male name_present=N_name total, by(pyear)
	tempfile number
	save `number'
	
	
use  data\bookstat_sales_gender_pyear_genre_asin.dta, clear 
		collapse (sum) q qm=q_male qname=q_name, by(pyear year )

		merge m:1 pyear  using `number'
		
		
	 * of those with identification
		gen rv_in =  1 - males/name_present 	
		gen sf_in = 1 - qm/qname 
					

	* known women over total (conservative )
		gen rv_all = (name_present - males)/total 
		gen sf_all = (qname - qm)/q 


		
		
		keep if year>=2018 & year<=2021 
		keep if pyear>=1960 & pyear<=2021 		

		label var sf_in "share of vintage sales (in)"
		label var rv_in "female-authored share of new products (in)"

		label var sf_all "share of vintage sales (all)"
		label var rv_all "female-authored share of new products (all)"


	* in 


		gen sf = sf_in
		gen rv = rv_in

		eststo clear

		eststo: reghdfe sf rv [weight=qname], absorb(year) 


*********************************
* Goodreads regressions - by year and pyear 
*********************************
 

use  data\books_year.dta, clear  


		keep if year>=2007 & year<=2016
		keep if pubyr >=1960 & pubyr<=2016 
		************************************************
		* the next line creates the conservative female share 
		************************************************
		replace fbookp=0 if fbookp==.
		collapse (count) n=q , by(pubyr fbookp )
		tempfile gr 
		save `gr'

	
use  data\books_year.dta, clear  


		keep if year>=2007 & year<=2016
		keep if pubyr >=1960 & pubyr<=2016 
		

		drop if fbookp==. 
		* replace fbook=0 if fbook==.
		collapse (sum) qf qm q , by(year pubyr fbookp )
		merge m:1 pubyr fbookp using `gr'
	
		egen Qv = sum(q), by(pubyr year)
		gen sv=q/Qv
		
		egen Nv = sum(n), by(pubyr year)
		gen rv = n/Nv
		
		egen QFv = sum(qf), by(pubyr year)
		gen sfv=qf/QFv
		egen QMv = sum(qm), by(pubyr year)
		gen smv=qm/QMv

	
		egen N=sum(n), by(pubyr) 
			
		label var sv "female-authored share of consumption from a vintage  (Goodreads)"
		label var rv "female-authored share of new products from a vintage"
		label var sfv "fem-auth shr of female cons"
		label var smv "fem-auth shr of male cons"


		* controlling for year 
		eststo: areg sv rv if fbookp==1 [weight=Qv] , absorb(year)  




**********************************
* Bookstat genre 
**********************************
use data\bookstat_gender_pyear_genre_asin.dta, clear

		collapse (sum) males=N_male name_present=N_name total, by(pyear genre)
		tempfile number
		save `number'

use  data\bookstat_sales_gender_pyear_genre_asin.dta, clear 

		collapse (sum) q qm=q_male qname=q_name, by(pyear year genre )

		merge m:1 pyear genre  using `number'


 * of those with identification
				

		* known women over total (conservative )
		gen rv = (name_present - males)/name_present 
		gen sf = (qname - qm)/qname 

		keep if year>=2018 & year<=2021 
		keep if pyear>=1960 & pyear<=2021 		

		label var sf "share of vintage sales"
		label var rv "female-authored share of new products"


		egen gno=group(genre)
		eststo: reghdfe sf rv [weight=qname], absorb(year gno pyear) 
	

*********************************
* Goodreads regressions - by year and pyear and genre 
*********************************



use  data\books_year.dta, clear  


		keep if year>=2007 & year<=2016
		keep if pubyr >=1960 & pubyr<=2016 
		drop if fbookp==. 
		* replace fbook=0 if fbook==.
		collapse (count) n=q , by(pubyr fbookp genre1 )
		tempfile gr 
		save `gr'



use  data\books_year.dta, clear  


		keep if year>=2007 & year<=2016
		keep if pubyr >=1960 & pubyr<=2016 
	
		drop if fbookp==. 
		* replace fbook=0 if fbook==.

		collapse (sum) qf qm q , by(year pubyr fbookp genre1 )

		merge m:1 pubyr fbookp genre1 using `gr'

		egen gno=group(genre1)
		
		
		egen Qv = sum(q), by(genre1 year pubyr)
		gen sv=q/Qv
		
		egen Nv = sum(n), by(genre1 year pubyr)
		gen rv = n/Nv
		
		egen QFv = sum(qf), by(genre1 year pubyr)
		gen sfv=qf/QFv
		egen QMv = sum(qm), by(genre1 year pubyr)
		gen smv=qm/QMv

	
		egen N=sum(n), by(pubyr) 
			
		label var sv "female-authored share of consumption"
		label var rv "female-authored share of new products"
		label var sfv "fem-auth shr of female cons"
		label var smv "fem-auth shr of male cons"

		gen gy = gno*10000 + year 
		eststo: reghdfe sv rv if fbook==1 [weight=Qv] , absorb(gno year pubyr)  
		eststo: reghdfe sfv rv if fbook==1 [weight=QFv] , absorb(gno year pubyr)  
		eststo: reghdfe smv rv if fbook==1 [weight=QMv] , absorb(gno year pubyr)  




**********************************
* find growing genres - Bookstat  
**********************************




use  data\bookstat_sales_gender_pyear_genre_asin.dta, clear 

		collapse (sum) q qm=q_male qname=q_name, by(pyear  genre )
		

		
		keep if pyear>=1960 & pyear<=2021
		egen Q=sum(q), by(pyear)
		gen s=q/Q
		
		sort genre
		by genre: reg s pyear
		gen lq = ln(q)
		
		
		egen QQ=sum(q), by(genre)
		gen genre1=genre 
		* replace genre1 = "other" if QQ<2.5e+07
		
		preserve 
			egen gno=group(genre)
			collapse (mean) gno, by(genre) 
			tempfile genre 
			save `genre'
		restore 



			
		egen gno=group(genre)
				reg s i.gno#c.pyear i.gno 

		parmest, norestore 


		split parm, parse(".gno#c." "b.gno#c.")
		gen gno=real(parm1)
		drop if gno==. 

		merge 1:1 gno using `genre'


		graph bar (mean) estimate if estimate~=., over(genre, sort(estimate) descending label(angle(forty_five) labsize(vsmall))) scheme(lean2) yline(1) ytitle(annual sales share growth)
		 * graph export "D:\Dropbox\book_welfare\latex_text\figures\genre_sales_growth.pdf", as(pdf) name("Graph") replace 


		keep genre estimate 
		tempfile grow 
		save `grow'


****************************************************
* run regression, limiting genres by growth rates
****************************************************
use data\bookstat_gender_pyear_genre_asin.dta, clear

		collapse (sum) males=N_male name_present=N_name total, by(pyear genre)
		tempfile number
		save `number'
use  data\bookstat_sales_gender_pyear_genre_asin.dta, clear 

		collapse (sum) q qm=q_male qname=q_name, by(pyear year genre )

		merge m:1 pyear genre  using `number'

		
	 * of those with identification
		gen rv_old =  (name_present - males)/name_present 	
		gen sf_old = (qname - qm)/qname 
					

	* known women over total (conservative )
		gen rv = (name_present - males)/name_present 
		gen sf = (qname - qm)/qname 

		keep if year>=2018 & year<=2021 
		keep if pyear>=1960 & pyear<=2021 		


		drop _merge 

		merge m:1 genre using `grow'


		label var sf "share of vintage sales"
		label var rv "female-authored share of new products"


		egen gno=group(genre) 
		reghdfe sf rv, absorb(year gno pyear) 



		su estimate, de 
		local med = r(p50)
		
		
		eststo:reghdfe sf rv if estimate<   `med'   [weight=qname], absorb(year gno pyear) 
		eststo:reghdfe sf rv if estimate>   `med'   [weight=qname], absorb(year gno pyear) 




***********************************************	
local pattern prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})

	esttab,  noomitted mtitles("BS (tv)"  "GR (tv)"  "BS (tvg)"  "GR (tvg)" "GR fem (tvg)" "GR men (tvg)" "declining"  "growing") drop( _cons) se replace  label nonotes star(* 0.10 ** 0.05 *** 0.01) scalars("r2_a $\overline{R^2}$") nocons

	esttab using "latex_text\tables\descriptive_regression_asin_in_weighted.tex",  noomitted mtitles("BS (tv)"  "GR (tv)"  "BS (tvg)"  "GR (tvg)" "GR fem (tvg)" "GR men (tvg)" "declining"  "growing") drop( _cons) se replace  label nonotes star(* 0.10 ** 0.05 *** 0.01) scalars("r2_a $\overline{R^2}$") nocons booktabs 
	

***********************************************	
***********************************************	
***********************************************	
* all 	
***********************************************	
***********************************************	
***********************************************	

cd "D:\Dropbox\book_welfare\replication"

use data\bookstat_gender_pyear_genre_asin.dta, clear
		keep if pyear>=1960 & pyear<=2021 		
		collapse (sum) males=N_male name_present=N_name total, by(pyear)
		tempfile number
		save `number'
		
use  data\bookstat_sales_gender_pyear_genre_asin.dta, clear 
		keep if year>=2018 & year<=2021 
		keep if pyear>=1960 & pyear<=2021 		
		collapse (sum) q qm=q_male qname=q_name, by(pyear year )

		merge m:1 pyear  using `number'

		
		
	 * of those with identification
		gen nf_in =  1 - males/name_present 	
		gen sf_in = 1 - qm/qname 
					

	* known women over total (conservative )
		gen nf_all = (name_present - males)/total 
		gen sf_all = (qname - qm)/q 


		
				
		keep if year>=2018 & year<=2021 
		keep if pyear>=1960 & pyear<=2021 		

		label var sf_in "share of vintage sales (in)"
		label var nf_in "female-authored share of new products (in)"

		label var sf_all "share of vintage sales (all)"
		label var nf_all "female-authored share of new products (all)"


		gen sv = sf_all
		gen rv = nf_all 


		eststo clear

		eststo: reghdfe sv rv [weight=q], absorb(year) 

		

*********************************
* Goodreads regressions - by year and pyear 
*********************************
 

use  data\books_year.dta, clear  


		keep if year>=2007 & year<=2016
		keep if pubyr >=1960 & pubyr<=2016 
		************************************************
		* the next line creates the conservative female share 
		************************************************
		replace fbookp=0 if fbookp==.
		collapse (count) n=q , by(pubyr fbookp )
		tempfile gr 
		save `gr'

	
use  data\books_year.dta, clear  


		keep if year>=2007 & year<=2016
		keep if pubyr >=1960 & pubyr<=2016 
		

		replace fbookp=0 if fbookp==.
		collapse (sum) qf qm q , by(year pubyr fbookp )
		merge m:1 pubyr fbookp using `gr'
	
		egen Qv = sum(q), by(pubyr year)
		gen sv=q/Qv
		
		egen Nv = sum(n), by(pubyr year)
		gen rv = n/Nv
		
		egen QFv = sum(qf), by(pubyr year)
		gen sfv=qf/QFv
		egen QMv = sum(qm), by(pubyr year)
		gen smv=qm/QMv

	
		egen N=sum(n), by(pubyr) 
			
		label var sv "female-authored share of consumption from a vintage  (Goodreads)"
		label var rv "female-authored share of new products from a vintage"
		label var sfv "fem-auth shr of female cons"
		label var smv "fem-auth shr of male cons"


		* controlling for year 
		eststo: areg sv rv if fbookp==1 [weight=Qv], absorb(year)  




**********************************
* Bookstat genre 
**********************************

use data\bookstat_gender_pyear_genre_asin.dta, clear

		collapse (sum) males=N_male name_present=N_name total, by(pyear genre)
		tempfile number
		save `number'

use  data\bookstat_sales_gender_pyear_genre_asin.dta, clear 

		collapse (sum) q qm=q_male qname=q_name, by(pyear year genre )

		merge m:1 pyear genre  using `number'



			
	 * of those with identification
		gen rv_old =  1 - males/name_present 	
		gen sf_old = 1 - qm/qname 
					

	* known women over total (conservative )
		gen rv = (name_present - males)/total 
		gen sf = (qname - qm)/q 

		keep if year>=2018 & year<=2021 
		keep if pyear>=1960 & pyear<=2021 		

		label var sf "share of vintage sales"
		label var rv "female-authored share of new products"


		egen gno=group(genre)
		eststo: reghdfe sf rv [weight=q], absorb(year gno pyear) 
	

*********************************
* Goodreads regressions - by year and pyear and genre 
*********************************



use  data\books_year.dta, clear  


		keep if year>=2007 & year<=2016
		keep if pubyr >=1960 & pubyr<=2016 
		replace fbookp=0 if fbookp==.
		collapse (count) n=q , by(pubyr fbookp genre1 )
		tempfile gr 
		save `gr'



use  data\books_year.dta, clear  


		keep if year>=2007 & year<=2016
		keep if pubyr >=1960 & pubyr<=2016 
	
		replace fbookp=0 if fbookp==.

		collapse (sum) qf qm q , by(year pubyr fbookp genre1 )

		merge m:1 pubyr fbookp genre1 using `gr'

		egen gno=group(genre1)
		
		
		egen Qv = sum(q), by(genre1 year pubyr)
		gen sv=q/Qv
		
		egen Nv = sum(n), by(genre1 year pubyr)
		gen rv = n/Nv
		
		egen QFv = sum(qf), by(genre1 year pubyr)
		gen sfv=qf/QFv
		egen QMv = sum(qm), by(genre1 year pubyr)
		gen smv=qm/QMv

	
		egen N=sum(n), by(pubyr) 
			
		label var sv "female-authored share of consumption"
		label var rv "female-authored share of new products"
		label var sfv "fem-auth shr of female cons"
		label var smv "fem-auth shr of male cons"

		gen gy = gno*10000 + year 
		eststo: reghdfe sv rv if fbookp==1 [weight=Qv], absorb(gno year pubyr)  





**********************************
* find growing genres - Bookstat  
**********************************



use  data\bookstat_sales_gender_pyear_genre_asin.dta, clear 

		collapse (sum) q qm=q_male qname=q_name, by(pyear year genre )
		

		
		keep if pyear>=1960 & pyear<=2021
		gen qf = qname - qm 
		
		egen Q=sum(q), by(pyear)
		gen s=q/Q
		

		
		egen QQ=sum(q), by(genre)
		gen genre1=genre 
		* replace genre1 = "other" if QQ<2.5e+07
			
		preserve 
			egen gno=group(genre)
			collapse (mean) gno, by(genre) 
			tempfile genre 
			save `genre'
		restore 



	
		egen gno=group(genre)
		reg s i.gno#c.pyear i.gno 
				
				
		parmest, norestore 


			split parm, parse(".gno#c." "b.gno#c.")
			gen gno=real(parm1)
			drop if gno==. 

			merge 1:1 gno using `genre'

		graph bar (mean) estimate if estimate~=., over(genre, sort(estimate) descending label(angle(forty_five) labsize(vsmall))) scheme(lean2) yline(1) ytitle(annual sales share growth)
		graph export latex_text\figures\genre_sales_growth.pdf, as(pdf) name("Graph") replace 


keep genre estimate 
tempfile grow 
save `grow'


****************************************************
* run regression, limiting genres by growth rates
****************************************************
	



use data\bookstat_gender_pyear_genre_asin.dta, clear

		collapse (sum) males=N_male name_present=N_name total, by(pyear genre)
		tempfile number
		save `number'

use  data\bookstat_sales_gender_pyear_genre_asin.dta, clear 

		collapse (sum) q qm=q_male qname=q_name, by(pyear year genre )

		merge m:1 pyear genre  using `number'



		
	 * of those with identification
		gen rv_old =  (name_present - males)/name_present 	
		gen sf_old = (qname - qm)/qname 
					

	* known women over total (conservative )
		gen rv = (name_present - males)/total 
		gen sf = (qname - qm)/q 

		keep if year>=2018 & year<=2021 
		keep if pyear>=1960 & pyear<=2021 		


		drop _merge 

		merge m:1 genre using `grow'


		label var sf "share of vintage sales"
		label var rv "female-authored share of new products"


		egen gno=group(genre) 
		reghdfe sf rv, absorb(year gno pyear) 



su estimate, de 
local med = r(p50)


eststo:reghdfe sf rv if estimate<   `med'  [weight=q] , absorb(year gno pyear) 
eststo:reghdfe sf rv if estimate>=   `med'   [weight=q], absorb(year gno pyear) 



********************************************


*********************************
* Goodreads regressions - by year and pyear and genre _ gender-leanings 
*********************************



use  data\books_year.dta, clear  


		keep if year>=2007 & year<=2016
		keep if pubyr >=1960 & pubyr<=2016 
		replace fbookp=0 if fbookp==.
		collapse (count) n=q , by(pubyr fbookp genre1 )
		tempfile gr 
		save `gr'



use  data\books_year.dta, clear  


		keep if year>=2007 & year<=2016
		keep if pubyr >=1960 & pubyr<=2016 
	
		replace fbookp=0 if fbookp==.

		collapse (sum) qf qm q , by(year pubyr fbookp genre1 )

		merge m:1 pubyr fbookp genre1 using `gr'

	egen gno=group(genre1)
	
	
		egen Qv = sum(q), by(genre1 year pubyr)
		gen sv=q/Qv
		
		egen Nv = sum(n), by(genre1 year pubyr)
		gen rv = n/Nv
		
		egen QFv = sum(qf), by(genre1 year pubyr)
		gen sfv=qf/QFv
		egen QMv = sum(qm), by(genre1 year pubyr)
		gen smv=qm/QMv

	
		egen N=sum(n), by(pubyr) 
			
		label var sv "female-authored share of consumption"
		label var rv "female-authored share of new products"
		label var sfv "fem-auth shr of female cons"
		label var smv "fem-auth shr of male cons"

		gen gy = gno*10000 + year 

		eststo: reghdfe sfv rv if fbook==1 [weight=QFv], absorb(gno year pubyr)  
		eststo: reghdfe smv rv if fbook==1 [weight=QMv], absorb(gno year pubyr)  
***********************************************	
local pattern prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})

	esttab,  noomitted mtitles("BS (tv)"  "GR (tv)"  "BS (tvg)"  "GR (tvg)"  "declining"  "growing" "GR fem (tvg)" "GR men (tvg)") drop( _cons) se replace  label nonotes star(* 0.10 ** 0.05 *** 0.01) scalars("r2_a $\overline{R^2}$") nocons

	esttab using "latex_text\tables\descriptive_regression_asin_all_weighted.tex",  noomitted mtitles("BS (tv)"  "GR (tv)"  "BS (tvg)"  "GR (tvg)"  "declining"  "growing" "GR fem (tvg)" "GR men (tvg)") drop( _cons) se replace  label nonotes star(* 0.10 ** 0.05 *** 0.01) scalars("r2_a $\overline{R^2}$") nocons booktabs 
	
